Libraries used:
####################################################### SKIP THIS CELL #######################################################
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import ipywidgets as widgets
import sklearn.model_selection
from datetime import datetime
from datetime import timedelta
from sklearn.svm import SVR
from IPython.display import display, HTML
from IPython.display import clear_output
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
The dataset used is from this Github repository provided by the John Hopkins University and is updated everyday.
## Data selection - create a clean dataset that only require 5 columns of data
dates = pd.date_range(start="01-22-2020",end="05-22-2020").to_pydatetime()
col1 = "Province/State"
col2 = "Country/Region"
col3 = "Confirmed"
col4 = "Deaths"
col5 = "Recovered"
column_names = [col1, col2, col3, col4, col5]
cleanDataset = pd.DataFrame(columns = column_names)
fullCleanDataset = {}
for i in range(len(dates)):
filePath = ("csse_covid_19_daily_reports\\%s.csv" % dates[i].strftime("%m-%d-%Y"))
df = pd.read_csv(filePath)
df = df.fillna(0)
cleanDataset = pd.DataFrame(columns = column_names)
## standardised the column names
if set([col1]).issubset(df.columns):
cleanDataset[col1] = df[col1]
else:
cleanDataset[col1] = df["Province_State"]
if set([col2]).issubset(df.columns):
cleanDataset[col2] = df[col2]
else:
cleanDataset[col2] = df["Country_Region"]
cleanDataset[col3] = df[col3]
cleanDataset[col4] = df[col4]
cleanDataset[col5] = df[col5]
## standardised the country names
cleanDataset.loc[cleanDataset['Country/Region'] == 'Mainland China', 'Country/Region'] = 'China'
cleanDataset.loc[cleanDataset['Country/Region'] == 'Iran (Islamic Republic of)', 'Country/Region'] = 'Iran'
cleanDataset.loc[cleanDataset['Country/Region'] == 'Congo (Brazzaville)', 'Country/Region'] = 'Congo'
cleanDataset.loc[cleanDataset['Country/Region'] == 'Congo (Kinshasa)', 'Country/Region'] = 'Congo'
## remove cases from cruise ship
cleanDataset.drop( cleanDataset[ cleanDataset['Province/State'] == 'Grand Princess' ].index , inplace=True)
cleanDataset.drop( cleanDataset[ cleanDataset['Province/State'] == 'Diamond Princess' ].index , inplace=True)
fullCleanDataset[dates[i].strftime("%m-%d-%Y")] = cleanDataset
# fullCleanDataset[i] = cleanDataset # hash the databaset with index
## Data transformation - aggregating data
## Data cleaning - remove noises
def getCountry(fullCleanDataset, country):
size = len(fullCleanDataset)
summary = np.zeros((size, 7), dtype=np.int64)
count = 0
for key in fullCleanDataset:
df = fullCleanDataset[key]
df = df.loc[df['Country/Region'] == country]
totalConfirmed = df.iloc[:,2].sum(axis=0)
totalDeaths = df.iloc[:,3].sum(axis=0)
totalRecovered = df.iloc[:,4].sum(axis=0)
active = totalConfirmed -totalDeaths - totalRecovered
if count == 0:
newConfirmed = 0
newDeaths = 0
newRecovered = 0
else:
# Prevent total count from decreasing
if totalConfirmed < summary[count-1, 0]:
totalConfirmed = summary[count-1, 0]
if totalDeaths < summary[count-1, 1]:
totalDeaths = summary[count-1, 1]
if totalRecovered < summary[count-1, 2]:
totalRecovered = summary[count-1, 2]
# getting new information
newConfirmed = totalConfirmed - summary[count-1, 0]
newDeaths = totalDeaths - summary[count-1, 1]
newRecovered = totalRecovered - summary[count-1, 2]
summary[count] = [totalConfirmed, totalDeaths, totalRecovered, newConfirmed, newDeaths, newRecovered, active]
count = count + 1
return summary
def getContinent(fullCleanDataset, continent):
size = len(fullCleanDataset)
summary = np.zeros((size, 7), dtype=np.int64)
count = 0
for key in fullCleanDataset:
df = fullCleanDataset[key]
temp = df[0:0]
for country in continent:
countryDF = df.loc[df['Country/Region'] == country]
temp = temp.append(countryDF)
totalConfirmed = temp.iloc[:,2].sum(axis=0)
totalDeaths = temp.iloc[:,3].sum(axis=0)
totalRecovered = temp.iloc[:,4].sum(axis=0)
active = totalConfirmed -totalDeaths - totalRecovered
if count == 0:
newConfirmed = 0
newDeaths = 0
newRecovered = 0
else:
# Prevent total count from decreasing
if totalConfirmed < summary[count-1, 0]:
totalConfirmed = summary[count-1, 0]
if totalDeaths < summary[count-1, 1]:
totalDeaths = summary[count-1, 1]
if totalRecovered < summary[count-1, 2]:
totalRecovered = summary[count-1, 2]
# getting new information
newConfirmed = totalConfirmed - summary[count-1, 0]
newDeaths = totalDeaths - summary[count-1, 1]
newRecovered = totalRecovered - summary[count-1, 2]
summary[count] = [totalConfirmed, totalDeaths, totalRecovered, newConfirmed, newDeaths, newRecovered, active]
count = count + 1
return summary
def getProvince(fullCleanDataset, province):
size = len(fullCleanDataset)
summary = np.zeros((size, 7), dtype=np.int64)
count = 0
for key in fullCleanDataset:
df = fullCleanDataset[key]
df = df.loc[df['Province/State'] == province]
totalConfirmed = df.iloc[:,2].sum(axis=0)
totalDeaths = df.iloc[:,3].sum(axis=0)
totalRecovered = df.iloc[:,4].sum(axis=0)
active = totalConfirmed -totalDeaths - totalRecovered
if count == 0:
newConfirmed = 0
newDeaths = 0
newRecovered = 0
else:
# Prevent total count from decreasing
if totalConfirmed < summary[count-1, 0]:
totalConfirmed = summary[count-1, 0]
if totalDeaths < summary[count-1, 1]:
totalDeaths = summary[count-1, 1]
if totalRecovered < summary[count-1, 2]:
totalRecovered = summary[count-1, 2]
# getting new information
newConfirmed = totalConfirmed - summary[count-1, 0]
newDeaths = totalDeaths - summary[count-1, 1]
newRecovered = totalRecovered - summary[count-1, 2]
summary[count] = [totalConfirmed, totalDeaths, totalRecovered, newConfirmed, newDeaths, newRecovered, active]
count = count + 1
return summary
def getState(fullCleanDataset, state):
size = len(fullCleanDataset)
summary = np.zeros((size, 7), dtype=np.int64)
count = 0
for key in fullCleanDataset:
df = fullCleanDataset[key]
df = df.loc[df['Province/State'] == state]
totalConfirmed = df.iloc[:,2].sum(axis=0)
totalDeaths = df.iloc[:,3].sum(axis=0)
totalRecovered = df.iloc[:,4].sum(axis=0)
active = totalConfirmed -totalDeaths - totalRecovered
if count == 0:
newConfirmed = 0
newDeaths = 0
newRecovered = 0
else:
# Prevent total count from decreasing
if totalConfirmed < summary[count-1, 0]:
totalConfirmed = summary[count-1, 0]
if totalDeaths < summary[count-1, 1]:
totalDeaths = summary[count-1, 1]
if totalRecovered < summary[count-1, 2]:
totalRecovered = summary[count-1, 2]
# getting new information
newConfirmed = totalConfirmed - summary[count-1, 0]
newDeaths = totalDeaths - summary[count-1, 1]
newRecovered = totalRecovered - summary[count-1, 2]
summary[count] = [totalConfirmed, totalDeaths, totalRecovered, newConfirmed, newDeaths, newRecovered, active]
count = count + 1
return summary
def convertToDF(dataset):
colName = ["Total Confirmed", "Total Deaths", "Total Recovered", "Daily Confirmed",
"Daily Deaths", "Daily Recovered", "Active"]
dates = pd.date_range(start="01-22-2020",periods=len(dataset)).to_pydatetime()
for i in range(len(dataset)):
dates[i] = dates[i].strftime("%m-%d-%Y")
convert = pd.DataFrame(dataset, index=dates, columns=colName)
return convert
####################################################### SKIP THIS CELL #######################################################
## getting data for countries
top_countries = ['US', 'Russia', 'Brazil', 'United Kingdom', 'Spain', 'Italy', 'France', 'Germany', 'Turkey', 'Iran',
'China', 'India']
top_countries_DB = {}
for country in top_countries:
top_countries_DB[country] = getCountry(fullCleanDataset, country)
################################################################################################################################
## dropdown for statistic
summaryDD = widgets.Dropdown(
options = top_countries,
description = 'Country: ',
)
def on_change(change):
if change['type'] == 'change' and change['name'] == 'value':
clear_output()
display(summaryDD)
df = getCountry(fullCleanDataset, change['new'])
df = convertToDF(df)
display(df.describe())
summaryDD.observe(on_change)
################################################################################################################################
## function
def plotGraph(fig, db, list, index, chart):
if chart == 'lines':
for element in list:
addTrace(fig, element, db[element], index, chart)
elif chart == 'bar':
count = 0
for element in list:
if count == 0:
addTrace(fig, element, db[element], index, chart)
else:
addTrace(fig, element, db[element], index, chart, 'legendonly')
count = count + 1
## Initialise all figures and corresponding dropdowns
x_dates = pd.date_range(start="01-22-2020",end="05-22-2020").to_pydatetime()
for i in range(len(x_dates)):
x_dates[i] = x_dates[i].strftime("%b %d")
def addTrace(fig, country, dataset, index, chart, visible=True):
if chart != 'bar':
fig.add_trace(go.Scatter(
name = country,
x = x_dates,
y = dataset[:, index],
mode = chart,
))
else:
fig.add_trace(go.Bar(
name = country,
x = x_dates,
y = dataset[:, index],
visible = visible,
))
figDD1 = widgets.Dropdown(
options = [('linear', 'linear'), ('logarithmic', 'log')],
description = 'Graph type: ',
)
def fig_on_change(change):
if change['type'] == 'change' and change['name'] == 'value':
clear_output()
display(figDD1)
fig1.update_yaxes(type = change['new'])
fig1.show()
figDD1.observe(fig_on_change)
fig1 = go.Figure()
plotGraph(fig1, top_countries_DB, top_countries, 0, 'lines')
fig1.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig1.update_layout(
title = "Total Confirmed Cases",
xaxis_title = "Dates",
yaxis_title = "Total Confirmed Cases",
legend_title_text = 'Country',
hovermode = "x unified",
)
fig2 = go.Figure()
plotGraph(fig2, top_countries_DB, top_countries, 3, 'bar')
fig2.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig2.update_layout(
title = "Daily Confirmed Cases",
xaxis_title = "Dates",
yaxis_title = "Daily Confirmed Cases",
legend_title_text = 'Country',
hovermode = "x unified",
)
fig3DD = widgets.Dropdown(
options = [('linear', 'linear'), ('logarithmic', 'log')],
description = 'Graph type: ',
)
def fig3_on_change(change):
if change['type'] == 'change' and change['name'] == 'value':
clear_output()
display(fig3DD)
fig3.update_yaxes(type = change['new'])
fig3.show()
fig3DD.observe(fig3_on_change)
fig3 = go.Figure()
plotGraph(fig3, top_countries_DB, top_countries, 1, 'lines')
fig3.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig3.update_layout(
title = "Total Deaths Cases",
xaxis_title = "Dates",
yaxis_title = "Total Deaths Cases",
legend_title_text = 'Country',
hovermode = "x unified",
)
fig4 = go.Figure()
plotGraph(fig4, top_countries_DB, top_countries, 4, 'bar')
fig4.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig4.update_layout(
title = "Daily Deaths Cases",
xaxis_title = "Dates",
yaxis_title = "Daily Deaths Cases",
legend_title_text = 'Country',
hovermode = "x unified",
)
fig5DD = widgets.Dropdown(
options = [('linear', 'linear'), ('logarithmic', 'log')],
description = 'Graph type: ',
)
def fig5_on_change(change):
if change['type'] == 'change' and change['name'] == 'value':
clear_output()
display(fig5DD)
fig5.update_yaxes(type = change['new'])
fig5.show()
fig5DD.observe(fig5_on_change)
fig5 = go.Figure()
plotGraph(fig5, top_countries_DB, top_countries, 2, 'lines')
fig5.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig5.update_layout(
title = "Total Recovered Cases",
xaxis_title = "Dates",
yaxis_title = "Total Recovered Cases",
legend_title_text = 'Country',
hovermode = "x unified",
)
fig6 = go.Figure()
plotGraph(fig6, top_countries_DB, top_countries, 5, 'bar')
fig6.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig6.update_layout(
title = "Daily Recovered Cases",
xaxis_title = "Dates",
yaxis_title = "Daily Recovered Cases",
legend_title_text = 'Country',
hovermode = "x unified",
)
fig7 = go.Figure()
plotGraph(fig7, top_countries_DB, top_countries, 6, 'lines')
fig7.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig7.update_layout(
title = "Active Cases",
xaxis_title = "Dates",
yaxis_title = "Active Cases",
legend_title_text = 'Country',
hovermode = "x unified",
)
display()
Please select different countries in the toggle to view different statistics. However, the descriptive statistics for total confirmed cases, total deaths cases and total recovered cases does not provide valuable information as they are aggregrated data.
display(summaryDD)
df = getCountry(fullCleanDataset, 'US')
df = convertToDF(df)
display(df.describe())
This section displays the total number of confirmed cases. To view different types of graph, choose toggle the dropdown. To view different countries, please click on the names. This is an interactive graph.
display(figDD1)
fig1.show()
This section displays the daily number of confirmed cases. To view different countries, please click on the names. This is an interactive graph.
fig2.show()
display(fig3DD)
fig3.show()
This section displays the daily number of death cases. To view different countries, please click on the names. This is an interactive graph.
fig4.show()
display(fig5DD)
fig5.show()
This section displays the daily number of recovered cases. To view different countries, please click on the names. This is an interactive graph.
fig6.show()
This section displays the total number of active cases. To view different countries, please click on the names. This is an interactive graph.
fig7.show()
In this section, we compare the number of cases by continents. The continents are split into Asia, Africa, Europe, North America, South America and Ocenia. The data was obtained from this website.
####################################################### SKIP THIS CELL #######################################################
## getting data for continents
continents = ['Asia', 'Africa', 'Europe', 'North America', 'South America', 'Oceania']
Asia = ['Afghanistan', 'Bahrain', 'Bangladesh', 'Bhutan', 'Brunei', 'Burma', 'Cambodia', 'China', 'Hong Kong'
'East Timor', 'India', 'Indonesia', 'Iran', 'Iraq', 'Israel', 'Japan', 'Jordan', 'Kazakhstan', 'Korea, North',
'Korea, South', 'Kuwait', 'Kyrgyzstan', 'Laos', 'Lebanon', 'Malaysia', 'Maldives', 'Mongolia', 'Nepal', 'Oman',
'Pakistan', 'Philippines', 'Qatar', 'Russian Federation', 'Saudi Arabia', 'Singapore', 'Sri Lanka', 'Syria',
'Taiwan', 'Tajikistan', 'Thailand', 'Turkey', 'Turkmenistan', 'United Arab Emirates', 'Uzbekistan', 'Vietnam', 'Yemen']
Africa = ['Algeria', 'Angola', 'Benin', 'Botswana', 'Burkina Faso', 'Burundi', 'Cameroon', 'Cape Verde',
'Central African Republic', 'Chad', 'Comoros', 'Congo', 'Djibouti', 'Egypt', 'Equatorial Guinea', 'Eritrea',
'Ethiopia', 'Gabon', 'Gambia', 'Ghana', 'Guinea', 'Guinea-Bissau', 'Ivory Coast', 'Kenya', 'Lesotho', 'Liberia',
'Libya', 'Madagascar', 'Malawi', 'Mali', 'Mauritania', 'Mauritius', 'Morocco', 'Mozambique', 'Namibia', 'Niger',
'Nigeria', 'Rwanda', 'Sao Tome and Principe', 'Senegal', 'Seychelles', 'Sierra Leone', 'Somalia', 'South Africa',
'South Sudan', 'Sudan', 'Swaziland', 'Tanzania', 'Togo', 'Tunisia', 'Uganda', 'Zambia', 'Zimbabwe']
Europe = ['Albania', 'Andorra', 'Armenia', 'Austria', 'Azerbaijan', 'Belarus', 'Belgium', 'Bosnia and Herzegovina', 'Bulgaria',
'Croatia', 'Cyprus', 'CZ', 'Denmark', 'Estonia', 'Finland', 'France', 'Georgia', 'Germany', 'Greece', 'Hungary',
'Iceland', 'Ireland', 'Italy', 'Latvia', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Macedonia', 'Malta', 'Moldova',
'Monaco', 'Montenegro', 'Netherlands', 'Norway', 'Poland', 'Portugal', 'Romania', 'San Marino', 'Serbia', 'Slovakia',
'Slovenia', 'Spain', 'Sweden', 'Switzerland', 'Ukraine', 'United Kingdom', 'Vatican City']
NorthAmerica = ['Antigua and Barbuda', 'Bahamas', 'Barbados', 'Belize', 'Canada', 'Costa Rica', 'Cuba', 'Dominica',
'Dominican Republic','El Salvador', 'Grenada', 'Guatemala', 'Haiti', 'Honduras', 'Jamaica', 'Mexico',
'Nicaragua', 'Panama', 'Saint Kitts and Nevis','Saint Lucia', 'Saint Vincent and the Grenadines',
'Trinidad and Tobago', 'US']
SouthAmerica = ['Argentina', 'Bolivia', 'Brazil', 'Chile', 'Colombia', 'Ecuador', 'Guyana', 'Paraguay', 'Peru', 'Suriname',
'Uruguay', 'Venezuela']
Oceania = ['Australia', 'Fiji', 'Kiribati', 'Marshall Islands', 'Micronesia', 'Nauru', 'New Zealand', 'Palau',
'Papua New Guinea', 'Samoa', 'Solomon Islands', 'Tonga', 'Tuvalu', 'Vanuatu']
continentsHash = {'Asia': Asia, 'Africa': Africa, 'Europe': Europe, 'North America': NorthAmerica,
'South America': SouthAmerica, 'Oceania': Oceania }
continents_DB = {}
for continent in continents:
continents_DB[continent] = getContinent(fullCleanDataset, continentsHash[continent])
asia = getContinent(fullCleanDataset, Asia)
africa = getContinent(fullCleanDataset, Africa)
europe = getContinent(fullCleanDataset, Europe)
northAmerica = getContinent(fullCleanDataset, NorthAmerica)
southAmerica = getContinent(fullCleanDataset, SouthAmerica)
oceania = getContinent(fullCleanDataset, Oceania)
################################################################################################################################
## dropdown for statistic
# continents = ['Asia', 'Africa', 'Europe', 'North America', 'South America', 'Oceania']
# continentsHash = {'Asia': asia, 'Africa': africa, 'Europe': europe, 'North America': northAmerica,
# 'South America': southAmerica, 'Oceania': oceania }
summaryDD2 = widgets.Dropdown(
options = continents,
description = 'Continent: ',
)
def on_change2(change):
if change['type'] == 'change' and change['name'] == 'value':
clear_output()
display(summaryDD2)
df = continents_DB[change['new']]
df = convertToDF(df)
display(df.describe())
summaryDD2.observe(on_change2)
################################################################################################################################
## Initialise all figures and corresponding dropdowns
fig1bDD = widgets.Dropdown(
options = [('linear', 'linear'), ('logarithmic', 'log')],
description = 'Graph type: ',
)
def fig1b_on_change(change):
if change['type'] == 'change' and change['name'] == 'value':
clear_output()
display(fig1bDD)
fig1b.update_yaxes(type = change['new'])
fig1b.show()
fig1bDD.observe(fig1b_on_change)
fig1b = go.Figure()
plotGraph(fig1b, continents_DB, continents, 0, 'lines')
fig1b.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig1b.update_layout(
title = "Total Confirmed Cases",
xaxis_title = "Dates",
yaxis_title = "Total Confirmed Cases",
legend_title_text = 'Country',
hovermode = "x unified",
)
fig2b = go.Figure()
plotGraph(fig2b, continents_DB, continents, 3, 'bar')
fig2b.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig2b.update_layout(
title = "Daily Confirmed Cases",
xaxis_title = "Dates",
yaxis_title = "Daily Confirmed Cases",
legend_title_text = 'Country',
hovermode = "x unified",
)
fig3bDD = widgets.Dropdown(
options = [('linear', 'linear'), ('logarithmic', 'log')],
description = 'Graph type: ',
)
def fig3b_on_change(change):
if change['type'] == 'change' and change['name'] == 'value':
clear_output()
display(fig3bDD)
fig3b.update_yaxes(type = change['new'])
fig3b.show()
fig3bDD.observe(fig3b_on_change)
fig3b = go.Figure()
plotGraph(fig3b, continents_DB, continents, 1, 'lines')
fig3b.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig3b.update_layout(
title = "Total Deaths Cases",
xaxis_title = "Dates",
yaxis_title = "Total Deaths Cases",
legend_title_text = 'Country',
hovermode = "x unified",
)
fig4b = go.Figure()
plotGraph(fig4b, continents_DB, continents, 4, 'bar')
fig4b.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig4b.update_layout(
title = "Daily Deaths Cases",
xaxis_title = "Dates",
yaxis_title = "Daily Deaths Cases",
legend_title_text = 'Country',
hovermode = "x unified",
)
fig5bDD = widgets.Dropdown(
options = [('linear', 'linear'), ('logarithmic', 'log')],
description = 'Graph type: ',
)
def fig5b_on_change(change):
if change['type'] == 'change' and change['name'] == 'value':
clear_output()
display(fig5bDD)
fig5b.update_yaxes(type = change['new'])
fig5b.show()
fig5bDD.observe(fig5b_on_change)
fig5b = go.Figure()
plotGraph(fig5b, continents_DB, continents, 2, 'lines')
fig5b.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig5b.update_layout(
title = "Total Recovered Cases",
xaxis_title = "Dates",
yaxis_title = "Total Recovered Cases",
legend_title_text = 'Country',
hovermode = "x unified",
)
fig6b = go.Figure()
plotGraph(fig6b, continents_DB, continents, 5, 'bar')
fig6b.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig6b.update_layout(
title = "Daily Recovered Cases",
xaxis_title = "Dates",
yaxis_title = "Daily Recovered Cases",
legend_title_text = 'Country',
hovermode = "x unified",
)
fig7b = go.Figure()
plotGraph(fig7b, continents_DB, continents, 6, 'lines')
fig7b.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig7b.update_layout(
title = "Active Cases",
xaxis_title = "Dates",
yaxis_title = "Active Cases",
legend_title_text = 'Country',
hovermode = "x unified",
)
display()
Please select different continents in the toggle to view different statistics. However, the descriptive statistics for total confirmed cases, total deaths cases and total recovered cases does not provide valuable information as they are aggregrated data.
display(summaryDD2)
df = continents_DB['Asia']
df = convertToDF(df)
display(df.describe())
display(fig1bDD)
fig1b.show()
This section displays the daily number of confirmed cases. To view different continents, please click on the names. This is an interactive graph.
fig2b.show()
display(fig3bDD)
fig3b.show()
Although North America was hit hardest with the virus, Europe has the highest deaths count. On May 22, the difference between Europe and North America in terms of total dealth cases are approximate 54,000 cases.
This section displays the total number of daily death cases. To view different continents, please click on the names. This is an interactive graph.
fig4b.show()
display(fig5bDD)
fig5b.show()
This section displays the daily number of recovered cases. To view different continents, please click on the names. This is an interactive graph.
fig6b.show()
This section displays the total number of active cases. To view different continents, please click on the names. This is an interactive graph.
fig7b.show()
In this section, we take a look at the breakdown of the coronavirus cases in China by province, as opposed to as a whole country in previous sections.
####################################################### SKIP THIS CELL #######################################################
china_provinces = ['Hubei', 'Zhejiang', 'Guangdong', 'Henan', 'Hunan', 'Jiangxi', 'Anhui', 'Chongqing', 'Shandong', 'Sichuan', 'Jiangsu', 'Beijing', 'Shanghai', 'Fujian', 'Guangxi', 'Shaanxi', 'Yuunan', 'Hebei', 'Heilongjiang', 'Hainan', 'Liaoning', 'Shanxi', 'Tianjin', 'Guizhou', 'Gansu', 'Ningxia', 'Inner Mongolia', 'Xinjiang', 'Qinghai', 'Tibet']
china_provinces_DB = {}
for province in china_provinces:
china_provinces_DB[province] = getProvince(fullCleanDataset, province)
################################################################################################################################
chinaDD = widgets.Dropdown(
options = china_provinces,
description = 'Provinces: ',
)
def on_change_china(change):
if change['type'] == 'change' and change['name'] == 'value':
clear_output()
display(chinaDD)
df = getProvince(fullCleanDataset, change['new'])
df = convertToDF(df)
display(df.describe())
# toggleCode(tog)
chinaDD.observe(on_change_china)
################################################################################################################################
##function
def plotGraph(fig, db, list, index, chart):
if chart == 'lines':
for element in list:
addTrace(fig, element, db[element], index, chart)
elif chart == 'bar':
count = 0
for element in list:
if count == 0:
addTrace(fig, element, db[element], index, chart)
else:
addTrace(fig, element, db[element], index, chart, 'legendonly')
count = count + 1
x_dates = pd.date_range(start="01-22-2020",end="05-22-2020").to_pydatetime()
for i in range(len(x_dates)):
x_dates[i] = x_dates[i].strftime("%b %d")
def addTrace(fig, province, dataset, index, chart, visible=True):
if chart != 'bar':
fig.add_trace(go.Scatter(
name = province,
x = x_dates,
y = dataset[:, index],
mode = chart,
))
else:
fig.add_trace(go.Bar(
name = province,
x = x_dates,
y = dataset[:, index],
visible = visible,
))
chinaProvinceDD = widgets.Dropdown(
options = [('linear', 'linear'), ('logarithmic', 'log')],
description = 'Graph type: ',
)
def fig8_on_change(change):
if change['type'] == 'change' and change['name'] == 'value':
clear_output()
display(chinaProvinceDD)
fig8.update_yaxes(type = change['new'])
fig8.show()
chinaProvinceDD.observe(fig8_on_change)
fig8 = go.Figure()
plotGraph(fig8, china_provinces_DB, china_provinces, 0, 'lines')
fig8.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig8.update_layout(
title = "Total Confirmed Cases in China",
xaxis_title = "Dates",
yaxis_title = "Total Confirmed Cases",
legend_title_text = 'Province',
hovermode = "x unified",
)
fig9 = go.Figure()
plotGraph(fig9, china_provinces_DB, china_provinces, 3, 'bar')
fig9.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig9.update_layout(
title = "Daily Confirmed Cases in China",
xaxis_title = "Dates",
yaxis_title = "Daily Confirmed Cases",
legend_title_text = 'Province',
hovermode = "x unified",
)
chinaProvinceDeathDD = widgets.Dropdown(
options = [('linear', 'linear'), ('logarithmic', 'log')],
description = 'Graph type: ',
)
def fig10_on_change(change):
if change['type'] == 'change' and change['name'] == 'value':
clear_output()
display(chinaProvinceDeathDD)
fig10.update_yaxes(type = change['new'])
fig10.show()
chinaProvinceDeathDD.observe(fig10_on_change)
fig10 = go.Figure()
plotGraph(fig10, china_provinces_DB, china_provinces, 1, 'lines')
fig10.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig10.update_layout(
title = "Total Deaths Cases in China",
xaxis_title = "Dates",
yaxis_title = "Total Deaths Cases",
legend_title_text = 'Province',
hovermode = "x unified",
)
fig11= go.Figure()
plotGraph(fig11, china_provinces_DB, china_provinces, 4, 'bar')
fig11.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig11.update_layout(
title = "Daily Deaths Cases in China",
xaxis_title = "Dates",
yaxis_title = "Daily Deaths Cases",
legend_title_text = 'Province',
hovermode = "x unified",
)
chinaProvinceRecDD = widgets.Dropdown(
options = [('linear', 'linear'), ('logarithmic', 'log')],
description = 'Graph type: ',
)
def fig12on_change(change):
if change['type'] == 'change' and change['name'] == 'value':
clear_output()
display(chinaProvinceRecDD)
fig12.update_yaxes(type = change['new'])
fig12.show()
chinaProvinceRecDD.observe(fig12on_change)
fig12 = go.Figure()
plotGraph(fig12, china_provinces_DB, china_provinces, 2, 'lines')
fig12.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig12.update_layout(
title = "Total Recovered Cases in China",
xaxis_title = "Dates",
yaxis_title = "Total Recovered Cases",
legend_title_text = 'Province',
hovermode = "x unified",
)
fig13 = go.Figure()
plotGraph(fig13, china_provinces_DB, china_provinces, 5, 'bar')
fig13.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig13.update_layout(
title = "Daily Recovered Cases in China",
xaxis_title = "Dates",
yaxis_title = "Daily Recovered Cases",
legend_title_text = 'Province',
hovermode = "x unified",
)
fig14 = go.Figure()
plotGraph(fig14, china_provinces_DB, china_provinces, 6, 'lines')
fig14.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig14.update_layout(
title = "Active Cases in China",
xaxis_title = "Dates",
yaxis_title = "Active Cases",
legend_title_text = 'Province',
hovermode = "x unified",
)
display()
Please select different provinces in the toggle to view different statistics. However, the descriptive statistics for total confirmed cases, total deaths cases and total recovered cases does not provide valuable information as they are aggregrated data.
display(chinaDD)
df = getProvince(fullCleanDataset, 'Hubei')
df = convertToDF(df)
display(df.describe())
display(chinaProvinceDD)
fig8.show()
This graph has shown that most of the cases from China are actually coming from Hubei. The line pattern of Hubei also resemble the line pattern of China as a whole country. When comparing Hubei and other province, other provinces is not significant at all.
This section displays the daily number of confirmed cases. To view different provinces, please click on the names. This is an interactive graph.
fig9.show()
This section displays the total number of death cases. To view different types of graph, choose toggle the dropdown. To view different provinces, please click on the names. This is an interactive graph.
display(chinaProvinceDeathDD)
fig10.show()
This section displays the daily number of death cases. To view different provinces, please click on the names. This is an interactive graph.
fig11.show()
display(chinaProvinceRecDD)
fig12.show()
This section displays the daily number of recovered cases. To view different provinces, please click on the names. This is an interactive graph.
fig13.show()
This section displays the total number of active cases. To view different provinces, please click on the names. This is an interactive graph.
fig14.show()
Number of active cases have been dramatically decreased by early April, thanks to strict movement restrictions put in place.
In this section, we take a look at the breakdown of the coronavirus cases in US by state, as opposed to as a whole country in previous sections.
####################################################### SKIP THIS CELL #######################################################
US_States = ['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado', 'Connecticut', 'Delaware', 'Columbia',
'Florida', 'Georgia', 'Guam', 'Hawaii', 'Idaho', 'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana',
'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota', 'Mississippi', 'Missouri', 'Montana', 'Nebraska',
'Nevada', 'New Hampshire', 'New Jersey', 'New Mexico', 'New York', 'North Carolina', 'North Dakota',
'Northern Mariana Islands', 'Ohio', 'Oklahoma', 'Oregon', 'Pennsylvania', 'Puerto Rico', 'Rhode Island',
'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virgin Islands', 'Virginia',
'Washington', 'West Virginia', 'Wisconsin', 'Wyoming']
US_States_DB = {}
for state in US_States:
US_States_DB[state] = getState(fullCleanDataset, state)
################################################################################################################################
usDD = widgets.Dropdown(
options = US_States,
description = 'States: ',
)
def on_change_US(change):
if change['type'] == 'change' and change['name'] == 'value':
clear_output()
display(usDD)
df = getState(fullCleanDataset, change['new'])
df = convertToDF(df)
display(df.describe())
# toggleCode(tog)
usDD.observe(on_change_US)
################################################################################################################################
##function
def plotGraph(fig, db, list, index, chart):
if chart == 'lines':
for element in list:
addTrace(fig, element, db[element], index, chart)
elif chart == 'bar':
count = 0
for element in list:
if count == 0:
addTrace(fig, element, db[element], index, chart)
else:
addTrace(fig, element, db[element], index, chart, 'legendonly')
count = count + 1
x_dates = pd.date_range(start="01-22-2020",end="05-22-2020").to_pydatetime()
for i in range(len(x_dates)):
x_dates[i] = x_dates[i].strftime("%b %d")
def addTrace(fig, province, dataset, index, chart, visible=True):
if chart != 'bar':
fig.add_trace(go.Scatter(
name = province,
x = x_dates,
y = dataset[:, index],
mode = chart,
))
else:
fig.add_trace(go.Bar(
name = province,
x = x_dates,
y = dataset[:, index],
visible = visible,
))
usStateDD = widgets.Dropdown(
options = [('linear', 'linear'), ('logarithmic', 'log')],
description = 'Graph type: ',
)
def fig15_on_change(change):
if change['type'] == 'change' and change['name'] == 'value':
clear_output()
display(usStateDD)
fig15.update_yaxes(type = change['new'])
fig15.show()
# toggleCode(tog)
usStateDD.observe(fig15_on_change)
fig15 = go.Figure()
plotGraph(fig15, US_States_DB, US_States, 0, 'lines')
fig15.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig15.update_layout(
title = "Total Confirmed Cases in the US",
xaxis_title = "Dates",
yaxis_title = "Total Confirmed Cases",
legend_title_text = 'State',
hovermode = "x unified",
)
fig16 = go.Figure()
plotGraph(fig16, US_States_DB, US_States, 3, 'bar')
fig16.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig16.update_layout(
title = "Daily Confirmed Cases in US",
xaxis_title = "Dates",
yaxis_title = "Daily Confirmed Cases",
legend_title_text = 'State',
hovermode = "x unified",
)
usStateDeathDD = widgets.Dropdown(
options = [('linear', 'linear'), ('logarithmic', 'log')],
description = 'Graph type: ',
)
def fig17_on_change(change):
if change['type'] == 'change' and change['name'] == 'value':
clear_output()
display(usStateDeathDD)
fig17.update_yaxes(type = change['new'])
fig17.show()
usStateDeathDD.observe(fig17_on_change)
fig17 = go.Figure()
plotGraph(fig17, US_States_DB, US_States, 1, 'lines')
fig17.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig17.update_layout(
title = "Total Deaths Cases in US",
xaxis_title = "Dates",
yaxis_title = "Total Deaths Cases",
legend_title_text = 'State',
hovermode = "x unified",
)
fig18= go.Figure()
plotGraph(fig18, US_States_DB, US_States, 4, 'bar')
fig18.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig18.update_layout(
title = "Daily Deaths Cases in US",
xaxis_title = "Dates",
yaxis_title = "Daily Deaths Cases",
legend_title_text = 'State',
hovermode = "x unified",
)
usStateRecDD = widgets.Dropdown(
options = [('linear', 'linear'), ('logarithmic', 'log')],
description = 'Graph type: ',
)
def fig19on_change(change):
if change['type'] == 'change' and change['name'] == 'value':
clear_output()
display(usStateRecDD)
fig19.update_yaxes(type = change['new'])
fig19.show()
usStateRecDD.observe(fig19on_change)
fig19 = go.Figure()
plotGraph(fig19, US_States_DB, US_States, 2, 'lines')
fig19.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig19.update_layout(
title = "Total Recovered Cases in US",
xaxis_title = "Dates",
yaxis_title = "Total Recovered Cases",
legend_title_text = 'State',
hovermode = "x unified",
)
fig20 = go.Figure()
plotGraph(fig20, US_States_DB, US_States, 5, 'bar')
fig20.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig20.update_layout(
title = "Daily Recovered Cases in US",
xaxis_title = "Dates",
yaxis_title = "Daily Recovered Cases",
legend_title_text = 'State',
hovermode = "x unified",
)
fig21 = go.Figure()
plotGraph(fig21, US_States_DB, US_States, 6, 'lines')
fig21.update_xaxes(tickangle=315, tickfont=dict(size=13))
fig21.update_layout(
title = "Active Cases in US",
xaxis_title = "Dates",
yaxis_title = "Active Cases",
legend_title_text = 'State',
hovermode = "x unified",
)
display()
Please select different states in the toggle to view different statistics. However, the descriptive statistics for total confirmed cases, total deaths cases and total recovered cases does not provide valuable information as they are aggregrated data.
display(usDD)
df = getState(fullCleanDataset, 'Alabama')
df = convertToDF(df)
display(df.describe())
display(usStateDD)
fig15.show()
This section displays the daily number of confirmed cases. To view different states, please click on the names. This is an interactive graph.
fig16.show()
This section displays the total number of death cases. To view different types of graph, choose toggle the dropdown. To view different states, please click on the names. This is an interactive graph.
display(usStateDeathDD)
fig17.show()
This section displays the daily number of death cases. To view different states, please click on the names. This is an interactive graph.
fig18.show()
display(usStateRecDD)
fig19.show()
This section displays the daily number of recovered cases. To view different states, please click on the names. This is an interactive graph.
fig20.show()
Since the recovery cases is not recoreded at state level. We could not compute the daily recovery cases as well. For daily number of recovered cases, please refer back to graph shown in Top Countries section.
This section displays the total number of active cases. To view different states, please click on the names. This is an interactive graph.
fig21.show()
None of the states in the US show signs of flattening the curve.
####################################################### SKIP THIS CELL #######################################################
US = getCountry(fullCleanDataset,'US')
China = getCountry(fullCleanDataset,'China')
fig22 = go.Figure()
addTrace(fig22, 'US', US, 0, 'lines')
addTrace(fig22, 'China', China, 0, 'lines')
fig22.update_layout(
title = "USA vs China Total Confirmed Cases",
xaxis_title = "Dates",
yaxis_title = "Total Confirmed Cases",
legend_title_text = 'Country',
hovermode = "x unified",
)
fig22.update_xaxes(tickangle=315, tickfont=dict(size=13))
shanghai = getProvince(fullCleanDataset,'Shanghai')
florida = getState(fullCleanDataset,'Florida')
fig23 = go.Figure()
addTrace(fig23, 'Florida', florida, 0, 'lines')
addTrace(fig23, 'Shanghai', shanghai, 0, 'lines')
fig23.update_layout(
title = "Most Dense Areas Total Confirmed Cases",
xaxis_title = "Dates",
yaxis_title = "Total Confirmed Cases",
legend_title_text = 'State/Province',
hovermode = "x unified",
)
fig23.update_xaxes(tickangle=315, tickfont=dict(size=13))
display()
fig22.show()
fig23.show()
An overview of cases in Malaysia.
####################################################### SKIP THIS CELL #######################################################
malaysia = getCountry(fullCleanDataset, "Malaysia")
figMsia = go.Figure()
addTrace(figMsia, 'Total Confirmed Cases', malaysia, 0, 'lines')
addTrace(figMsia, 'Total Deaths Cases', malaysia, 1, 'lines')
addTrace(figMsia, 'Total Recovered Cases', malaysia, 2, 'lines')
addTrace(figMsia, 'Active Cases', malaysia, 6, 'lines')
figMsia.update_xaxes(tickangle=315, tickfont=dict(size=13))
figMsia.update_layout(
title = "Malaysia (Line)",
xaxis_title = "Dates",
yaxis_title = "Number of Cases",
legend_title_text = '',
hovermode = "x unified",
)
figMsia2 = go.Figure()
addTrace(figMsia2, 'Daily Confirmed Cases', malaysia, 3, 'bar')
addTrace(figMsia2, 'Daily Deaths Cases', malaysia, 4, 'bar')
addTrace(figMsia2, 'Daily Recovered Cases', malaysia, 5, 'bar')
figMsia2.update_xaxes(tickangle=315, tickfont=dict(size=13))
figMsia2.update_layout(
title = "Malaysia (Bar)",
xaxis_title = "Dates",
yaxis_title = "Number of Cases",
legend_title_text = '',
hovermode = "x unified",
)
display()
The descriptive statistics for total confirmed cases, total deaths cases and total recovered cases does not provide valuable information as they are aggregrated data.
df = convertToDF(malaysia)
display(df.describe())
To view different line chart, please click on the names. This is an interactive graph.
display(figMsia)
To view different bar chart, please click on the names. This is an interactive graph.
display(figMsia2)
This graph combines total confirmed, death, recovered and active cases in a bar graph form.

def predict(countryDataset, index, title, futureDays):
## get dates for actual data and prediction
x_dates = pd.date_range(start="01-22-2020",end="05-22-2020").to_pydatetime()
x_dates2 = pd.date_range(start="01-22-2020", periods=(len(dates)+futureDays)).to_pydatetime()
for i in range(len(x_dates)):
x_dates[i] = x_dates[i].strftime("%b %d")
for i in range(len(x_dates2)):
x_dates2[i] = x_dates2[i].strftime("%b %d")
## for actual data
X = np.arange(len(x_dates)).reshape(-1, 1)
y = countryDataset[:len(x_dates), index]
## for prediction
X2 = np.arange(len(dates)+futureDays).reshape(-1, 1)
# SVM parameters
kernel = ['rbf']
c = [0.01, 0.1, 1, 10, 100, 500, 750, 1000, 1250]
gamma = [0.01, 0.1, 1]
epsilon = [0.01, 0.1, 1]
shrinking = [True, False]
svm_grid = {'kernel':kernel, 'C':c, 'gamma':gamma, 'epsilon':epsilon, 'shrinking':shrinking}
svm = SVR()
svm_search = sklearn.model_selection.RandomizedSearchCV(svm, svm_grid, scoring = 'neg_mean_squared_error',
cv=3, return_train_score=True, n_jobs=1, n_iter=50, verbose=1)
svm_search.fit(X, y)
# predict with best parameters
svm_best = svm_search.best_estimator_
y_pred = svm_best.predict(X).astype(int)
score = accuracy_score(y, y_pred, normalize=True)
y_pred = svm_best.predict(X2).astype(int)
pred_fig = go.Figure()
pred_fig.add_trace(go.Scatter(
name = 'Actual cases',
x = x_dates2,
y = y,
mode = 'lines',
))
pred_fig.add_trace(go.Scatter(
name = 'Predicted cases',
x = x_dates2,
y = y_pred,
mode = 'lines',
))
pred_fig.update_layout(
title = title + " (Actual vs Prediction)",
xaxis_title = "Dates",
yaxis_title = title,
legend_title_text = '',
hovermode = "x unified",
)
pred_fig.update_xaxes(tickangle=315, tickfont=dict(size=13))
clear_output()
print("svm parameter: ")
print(svm_best)
print("Accuracy: ")
print(score)
pred_fig.show()
predict(malaysia, 0, "Total Confirmed Cases", 30)
predict(malaysia, 1, "Daily Confirmed Cases", 30)
predict(malaysia, 6, "Active Cases", 30)